#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Processes data on county GDP for analysis.

#Load packages
library(tidyverse)
library(tidylog)
library(here)
library(modelsummary)
library(kableExtra)

#Function to better format table for LaTeX
source(here("code", "fun", "fix_txt.R"))

#Load data
g <- read.csv(here("data", "input", "beacountygdp", "CAGDP9__ALL_AREAS_2001_2021.csv"))
#Subset to coal mining
g <- subset(g, IndustryClassification %in% c("21", "..."))
#Pivot longer
g <- g %>% pivot_longer(cols = X2001:X2021, names_to = "year", values_to = "gdp")
#Create year variable
g$year <- as.numeric(gsub("X", "", g$year))
#Create FIPS variable
g$GeoFIPS <- as.numeric(g$GeoFIPS)
#Remove non-county observations and missing FIPS
g <- subset(g, GeoFIPS != 0)
g <- subset(g, !is.na(GeoFIPS))
#Get total coal industry
g$coal <- ifelse(g$Description == "All industry total ", "all", ifelse(g$IndustryClassification == "21", "coal", NA))
#Remove non-coal areas
g <- subset(g, !is.na(coal))
#Pivot data set to wider
g.wide <- g %>%
  mutate(gdp = as.numeric(gdp)) %>%
  rename(fips = GeoFIPS) %>%
  group_by(coal, fips, year) %>%
  summarise(gdp = sum(gdp, na.rm = TRUE)) %>%
  pivot_wider(id_cols = c(fips, year), values_from = gdp, names_from = coal)
# Create percent of county GDP measure
g.wide$coal_gdp <- g.wide$coal / g.wide$all
#Subset to relevant variables
g.wide.out <- subset(g.wide, select = c(fips, year, coal_gdp))
#Save code output
saveRDS(g.wide.out, here("data", "inter", "gdp_coal.rds"))
#Create average in 2005, 2006, and 2007---the pre-shock period
g.wide <- g.wide %>%
  filter(year %in% c(2005,2006,2007)) %>%
  group_by(fips) %>%
  summarize(coal_gdp = mean(coal_gdp, na.rm = TRUE))
#Load employment data
cbp <- readRDS(here("data", "inter", "cbp.rds"))
#Coal employment averaged in 2005, 2006, 2007
cbp <- cbp %>%
  filter(year %in% c(2005,2006,2007)) %>%
  mutate(coal_prop = coal / emp_all) %>%
  group_by(fips) %>%
  summarize(coal_prop = mean(coal_prop))
#Merge together
econ.cor <- left_join(g.wide, cbp, by = "fips")
#Estimate correlation
#Table formatting
gof_map[gof_map$raw=="nobs",]$clean <- "$N$"
gof_map[gof_map$raw=="adj.r.squared",]$clean <- "Adjusted $R^2$"
#Estimate model for Table C2
m <- list()
#Regress share of coal for county GDP on an indicator for more than 1% county coal employment
m[[1]] <- lm(coal_gdp ~ I(coal_prop>=.01), econ.cor)
#Regress share of coal to county GDP on an indicator with more than 10% county coal employment
m[[2]] <- lm(coal_gdp ~ I(coal_prop>=.1), econ.cor)
#Regress share of coal to county GDP on the normalized share of county coal employment
m[[3]] <- lm(coal_gdp ~ scale(coal_prop), econ.cor)
#Specify file output
file_gdpcor <- here("output", "tables", "si_tab_C2_tab_gdpcor.txt")
#Create table
modelsummary(
  m,
  vcov = "HC2",
  stars = c("*"=.1,"**"=.05,"***"=.01),
  gof_map = gof_map,
  fmt = 2,
  gof_omit = "R2|IC|Log|Log|RM|St",
  coef_map = c(
    "(Intercept)"="Intercept",
    "I(coal_prop >= 0.001)TRUE"="Above 0.1\\% Coal Employment (=1)",
    "I(coal_prop >= 0.01)TRUE"="Above 1\\% Coal Employment (=1)",
    "I(coal_prop >= 0.1)TRUE"="Above 10\\% Coal Employment (=1)",
    "scale(coal_prop)"="Coal Employment"
    ),
  escape = FALSE,
  output = "latex"
  ) %>%
  cat(., file = file_gdpcor)
fix_txt(file_gdpcor)
